# want the wide data for degree-related 
egodat <- readRDS("~/nsfg_data_cleaning/Objects/fullEgodata.rds")

egosvy <- as_survey_design(egodat$egos, ids=ego, weights = weight)
##### make ego data objects #########
## mar/coh
datMarcoh <- egodata(egos=egodat$egos, alters=egodat$altersMarCoh, egoWt = egodat$egos$weight, egoIDcol = "ego")
## others 
datOther <- egodata(egos=egodat$egos, alters=egodat$altersOther, egoWt = egodat$egos$weight, egoIDcol = "ego")
## one times 
#datOTs <- egodata(egos=egodat$egos, alters=egodat$altersOnce, egoWt = egodat$egos$weight, egoIDcol = "ego")
# want the long data for absdiff age, sex 
# this data exclusively contains relationships - no inactive egos 
long <- readRDS("~/nsfg_data_cleaning/Objects/altersegos_survdat.rds")
longsvy <- as_survey_design(long,  ids=ego, weights = weight)

1 Degree Dist

1.1 Overall

degs <- egosvy %>% 
        mutate(sex=as.factor(sex), deg.marcoh=as.factor(deg.marcoh), deg.other=as.factor(deg.other)) %>%
        group_by(sex, deg.marcoh, deg.other, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex) %>%
        mutate(prop = n/sum(n))

fdeg <- degs %>% 
          filter(sex=="F") %>% 
          select(-n, -n_se) %>% 
          pivot_wider(names_from = deg.other, values_from=prop) 
fdeg <- fdeg[,-1]

mdeg <- degs %>% 
          filter(sex=="M") %>% 
          select(-n, -n_se) %>% 
          pivot_wider(names_from = deg.other, values_from=prop) 
mdeg <- mdeg[,-1]

kable(fdeg, col.names = c("Degree Marcoh", "Deg Other=0", "Deg Other=1", "Deg Other=2", "Deg Other=3"),
      caption="Females") %>% kable_styling(full_width = F)
Females
Degree Marcoh Deg Other=0 Deg Other=1 Deg Other=2 Deg Other=3
0 0.4055612 0.1447793 0.0029561 0.0006287
1 0.4445246 0.0015368 0.0000134 0.0000000
kable(mdeg, col.names = c("Degree Marcoh", "Deg Other=0", "Deg Other=1", "Deg Other=2", "Deg Other=3"),
      caption="Males") %>% kable_styling(full_width = F)
Males
Degree Marcoh Deg Other=0 Deg Other=1 Deg Other=2 Deg Other=3
0 0.3848753 0.1409088 0.0079454 0.0014774
1 0.4615956 0.0030967 0.0001008 0.0000000

1.2 Marriage / Cohabs

Takeaways:
- Fs in fewer marriage/cohabs than men likely due to age boundary 45
- important heterogeneity by race (lower among black and others), age
- hispanic/white look similar

marcoh <- egosvy %>% 
        mutate(sex=as.factor(sex), deg.marcoh=as.factor(deg.marcoh), age=as.factor(age)) %>%
        group_by(sex, age, deg.marcoh, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex, age) %>%
        mutate(prop = n/sum(n)) %>%
        filter(deg.marcoh==1) %>%
        select(-n, -n_se) %>%
        ggplot(aes(x=age, y=prop, fill=sex)) +
        geom_col(position="dodge") +
        ggtitle("Mean Degree of Marriage/Cohab by Sex & Age")
ggplotly(marcoh)
degreedist(datMarcoh)

degreedist(datMarcoh, by="sex")

degreedist(datMarcoh, by="race")

degreedist(datMarcoh, by="agecat")

degreedist(datMarcoh, by="age")

1.3 Casual

Takeaways: - important heterogeneity by race (higher in black, lower in others), age
- hispanic/white looks similar
- very small sex differences - age boundary not as much an issue here

casual <- egosvy %>% 
        mutate(sex=as.factor(sex), deg.other=as.factor(deg.other), age=as.factor(age)) %>%
        group_by(sex, age, deg.other, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex, age) %>%
        mutate(prop = n/sum(n)) %>%
        filter(deg.other != 0) %>%
        select(-n, -n_se) %>%
        ggplot(aes(x=age, y=prop, fill=sex)) +
        geom_col(position="dodge") +
        facet_wrap(~deg.other)
        ggtitle("Mean Degree of Casual by Sex & Age")
## $title
## [1] "Mean Degree of Casual by Sex & Age"
## 
## attr(,"class")
## [1] "labels"
ggplotly(casual)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

1.4 Concurrency

# two types
# 1: between-network (marriage/cohab & casual)
# 2: intra-netwokr (casual & casual)

egosvy %>% 
        mutate(sex=as.factor(sex), deg.other=as.factor(deg.other>=1), deg.marcoh = as.factor(deg.marcoh), age=as.factor(age)) %>%
        group_by(sex, age, deg.marcoh, deg.other, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex, age) %>%
        mutate(prop = n/sum(n)) %>%
        filter(deg.other == TRUE & deg.marcoh != 0) %>%
        select(-n, -n_se) %>%
        ggplot(aes(x=age, y=prop, fill=sex)) +
        geom_col(position="dodge") +
        ggtitle("proportion of egos w/ between-network concurrency by sex & age")
## Warning: Removed 1 rows containing missing values (geom_col).

bet <- egosvy %>% 
        mutate(sex=as.factor(sex), deg.other=as.factor(deg.other>=1), deg.marcoh = as.factor(deg.marcoh), age=as.factor(age)) %>%
        group_by(sex, age, deg.marcoh, deg.other, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex, age) %>%
        mutate(prop = n/sum(n)) %>%
        filter(deg.other == TRUE & deg.marcoh != 0) %>%
        select(-n, -n_se) %>%
        group_by(sex) %>%
        mutate(mean=mean(prop, na.rm = T))

means <- data.frame(Sex = c("Females", "Males"), Mean = unique(bet$mean))

kable(means, caption = "Mean proportion of egos w/ cross-network concurrency by sex") %>%
  kable_styling(full_width = F)
Mean proportion of egos w/ cross-network concurrency by sex
Sex Mean
Females 0.0015149
Males 0.0033353
egosvy %>% 
        mutate(sex=as.factor(sex), deg.other=as.factor(deg.other>=2), age=as.factor(age)) %>%
        group_by(sex, age, deg.other, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex, age) %>%
        mutate(prop = n/sum(n)) %>%
        filter(deg.other == TRUE) %>%
        select(-n, -n_se) %>%
        ggplot(aes(x=age, y=prop, fill=sex)) +
        geom_col(position="dodge") +
        ggtitle("proportion of egos w/ within-casual-network concurrency by sex & age")

int <- egosvy %>% 
        mutate(sex=as.factor(sex), deg.other=as.factor(deg.other>1), age=as.factor(age)) %>%
        group_by(sex, age, deg.other, .drop = FALSE) %>% 
        summarize(n = survey_total()) %>% 
        group_by(sex, age) %>%
        mutate(prop = n/sum(n)) %>%
        filter(deg.other == TRUE) %>%
        select(-n, -n_se) %>%
        group_by(sex) %>%
        mutate(mean=mean(prop, na.rm = T))

means <- data.frame(Sex = c("Females", "Males"), Mean = unique(int$mean))

kable(means, caption = "Mean proportion of egos w/ casual-network concurrency by sex") %>%
  kable_styling(full_width = F)
Mean proportion of egos w/ casual-network concurrency by sex
Sex Mean
Females 0.0036151
Males 0.0094191

2 Age Mixing

2.1 Marriages / Cohabs

2.1.1 Diff Age

2.1.2 Diff Sqrt Age

2.1.3 Diff Cube Root Age

2.2 Others

2.2.1 Diff Age

2.2.2 Diff Sqrt Age

2.2.3 Diff Cube Root Age

2.2.4 One-Times

this needs some work

agemixF <- round(mixingmatrix(datOTs[datOTs$egos$sex %in% "F"], "agecat", rowprob = T), 3)
agemixF <- agemixF[1:6,]
amF <- melt(agemixF)
amF$sex <- "F"

agemixM <- round(mixingmatrix(datOTs[datOTs$egos$sex %in% "M"], "agecat", rowprob = T), 3)
agemixM <- agemixM[1:6,]
amM <- melt(agemixM)
amM$sex <- "M"

am <-  rbind(amF, amM)

am %>% ggplot(aes(ego, alter)) + 
  geom_point(color="blue", alpha=0.2, aes(size=value)) +
  scale_size_area(max_size = 30) +
  geom_text(aes(label=round(value,2), size=0.015)) +
  theme(legend.position="none",
        axis.text.x = element_text(angle=45)) +
  coord_flip() +
  labs(title = "Age Mixing - One-Times") +
  facet_wrap(~sex, ncol = 2)

2.3 Race Mixing

2.3.1 Mar/Cohs

2.3.2 Other

2.4 One-Times

one-times based on Male egos reports of partner race if one-time partner was most recent female one-times partner race based on male reports